Устанавливаем необходимые библиотки
library(tidyverse)
library(DESeq2)
library(pheatmap)
library(RColorBrewer)
library(clusterProfiler)
library(biomaRt)
library(org.Hs.eg.db)
library(EnhancedVolcano)
library(GenomicRanges)
library(msigdbr)
library(multiMiR)
library(miRBaseConverter)
library(enrichplot)
library(vsn)
library(rvest)
library(patchwork)
library(dbplyr)
setwd("/Users/dariakilina/GitHub/Micro-RNA_cardiology")
coldata <- read_tsv("data/phenotableV.tsv", show_col_types = FALSE)
coldata$type <- as.factor(coldata$type)
coldata$patient <- as.factor(coldata$patient)
coldata$condition <- as.factor(coldata$condition)
coldata <- as.data.frame(coldata)
rownames(coldata) <- coldata$sample
coldata
counts <- read.csv("data/miR.Counts.csv", header = TRUE, sep = ",")
counts <- column_to_rownames(counts, var = "miRNA")
#counts <- round(counts) # если используем нормализованные данные
head(counts)
colnames(counts) <- gsub("^X", "", colnames(counts))
common_samples <- intersect(colnames(counts), coldata$sample)
counts <- counts[, c(counts$miRNA, common_samples)]
counts <- counts[, rownames(coldata)] #ранжирую по колонки в counts так же как и названия строк в coldata
head(counts)
anno <- read.csv("data/annotation.report.csv", header = TRUE, sep = ",")
anno$Sample.name.s. <- gsub("-", ".", anno$Sample.name.s.)
anno <- anno[, -c(2:5, 7, 15)]
common_samples <- intersect(anno$Sample.name.s., coldata$sample)
anno <- anno[anno$Sample.name.s. %in% common_samples, ]
anno <- anno[match(rownames(coldata), anno$Sample.name.s.), ] #ранжирую по колонки в counts так же как и названия строк в coldata
anno
anno_long <- anno %>%
pivot_longer(cols = -Sample.name.s., names_to = "RNA_Type", values_to = "Count")
plt <- ggplot(anno_long, aes(x = Sample.name.s., y = Count, fill = RNA_Type)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(x = "Sample", y = "Read Count") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Set3") # Красивые цвета
print(plt)
ggsave("./pictures/barplot_alldataset_no_normalised.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
anno_long <- anno %>%
rowwise() %>%
mutate(across(-Sample.name.s., ~ . / sum(c_across(-Sample.name.s.)))) %>%
ungroup() %>%
pivot_longer(cols = -Sample.name.s., names_to = "RNA_Type", values_to = "Proportion")
plt <- ggplot(anno_long, aes(x = Sample.name.s., y = Proportion, fill = RNA_Type)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(x = "Sample", y = "Proportion") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Set3")
plt
ggsave("./pictures/barplot_alldataset_normalised.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
coldata$condition <- relevel(coldata$condition, ref = "before")
modelMatrix <- model.matrix(~type*condition + patient, data = coldata)
modelMatrix
(Intercept) type150 conditionafter patient17 patient29 type150:conditionafter
29.1p16_S39_R1_001 1 0 0 0 1 0
15.1p16_S33_R1_001 1 0 0 0 0 0
17.1p16_S35_R1_001 1 0 0 1 0 0
29.1p150_S40_R1_001 1 1 0 0 1 0
15.1p150_S34_R1_001 1 1 0 0 0 0
17.1p150_S36_R1_001 1 1 0 1 0 0
29.7p16_S41_R1_001 1 0 1 0 1 0
15.7p16_S31_R1_001 1 0 1 0 0 0
17.7p16_S37_R1_001 1 0 1 1 0 0
15.7p150_S32_R1_001 1 1 1 0 0 1
29.7p150_S42_R1_001 1 1 1 0 1 1
17.7p150_S38_R1_001 1 1 1 1 0 1
attr(,"assign")
[1] 0 1 2 3 3 4
attr(,"contrasts")
attr(,"contrasts")$type
[1] "contr.treatment"
attr(,"contrasts")$condition
[1] "contr.treatment"
attr(,"contrasts")$patient
[1] "contr.treatment"
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~type*condition + patient)
converting counts to integer mode
dds$condition <- relevel(dds$condition, ref = "before")
dds
class: DESeqDataSet
dim: 913 12
metadata(1): version
assays(1): counts
rownames(913): Hsa-Let-7-P1a_3p* Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p ... Hsa-Mir-9851_3p Hsa-Mir-9851_5p*
rowData names(0):
colnames(12): 29.1p16_S39_R1_001 15.1p16_S33_R1_001 ... 29.7p150_S42_R1_001 17.7p150_S38_R1_001
colData names(4): sample condition type patient
dim(dds)
[1] 913 12
smallestGroupSize <- 3
keep <- rowSums(counts(dds) >= 10) >= smallestGroupSize
dds <- dds[keep,]
dim(dds)
[1] 292 12
Run Differential Expression Analysis for all dataset
dds <- DESeq(dds, fitType = "parametric")
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
dds
class: DESeqDataSet
dim: 292 12
metadata(1): version
assays(4): counts mu H cooks
rownames(292): Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p Hsa-Let-7-P1b_5p ... Hsa-Mir-96-P2_5p Hsa-Mir-96-P3_5p
rowData names(38): baseMean baseVar ... deviance maxCooks
colnames(12): 29.1p16_S39_R1_001 15.1p16_S33_R1_001 ... 29.7p150_S42_R1_001 17.7p150_S38_R1_001
colData names(5): sample condition type patient sizeFactor
plotDispEsts(dds)
raw_counts <- counts(dds, normalized = FALSE)
normalized_counts <- counts(dds, normalized = TRUE)
df <- data.frame(
Sample = rep(colnames(dds), 2),
Counts = c(colSums(raw_counts), colSums(normalized_counts)),
Type = rep(c("Raw", "Normalized"), each = ncol(dds))
)
plt <- ggplot(df, aes(x = Sample, y = Counts, fill = Type)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(title = "Counts before and after normalization", x = "Sample", y = "Total Counts") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
plt
ggsave("./pictures/Counts before and after normalization.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
rlog трансформация
rlt <- rlog(dds) #rlog Transformation
meanSdPlot(assay(rlt))
vsd <- varianceStabilizingTransformation(dds, blind=FALSE)
meanSdPlot(assay(vsd)) #показывает, как изменяется стандартное отклонение в зависимости от среднего значения экспрессии
PCA plot
pcaData <- plotPCA(rlt, intgroup=c("condition", "type", "patient"), returnData = TRUE)
using ntop=500 top features by variance
percentVar <- round(100 * attr(pcaData, "percentVar"))
pcaData$condition_type <- paste(pcaData$condition, pcaData$type, sep = "_")
ggplot(pcaData, aes(PC1, PC2, shape = patient, color = condition_type)) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar[1], "%")) +
ylab(paste0("PC2: ", percentVar[2], "%")) +
coord_fixed() +
theme_bw() +
ggtitle("PCA plot for all dataset before removing donor effect")+
scale_color_brewer(palette = "Set2")
assay(rlt) <- limma::removeBatchEffect(assay(rlt),
batch = colData(dds)[,'patient'])
pcaData <- plotPCA(rlt, intgroup=c("condition", "type", "patient"), returnData = TRUE)
using ntop=500 top features by variance
percentVar <- round(100 * attr(pcaData, "percentVar"))
pcaData$condition_type <- paste(pcaData$condition, pcaData$type, sep = "_")
plt <- ggplot(pcaData, aes(PC1, PC2, shape = patient, color = condition_type)) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar[1], "%")) +
ylab(paste0("PC2: ", percentVar[2], "%")) +
coord_fixed() +
theme_bw() +
scale_color_brewer(palette = "Set2")
plt
ggsave("./pictures/PCA plot for all dataset after removing donor effect.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Plot a heatmap of 50 most expressed genes Этот heatmap отражает уровни экспрессии генов, а не разницу между группами. Цвета не означают up- или down-регуляцию в сравнении с контрольной группой, потому что heatmap показывает абсолютные значения экспрессии, а не fold change!
select <- order(rowMeans(counts(dds,normalized=TRUE)),
decreasing=TRUE)[1:50]
df <- as.data.frame(colData(dds)[,c("type", "condition")])
plt <- pheatmap(assay(rlt)[select,],
cluster_rows = TRUE,
show_rownames = TRUE,
cluster_cols = TRUE,
annotation_col = df,
fontsize_row = 6)
plt
ggsave("./pictures/Plot a heatmap of 50 most expressed genes.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Plot of the distance between samples heatmap Расчет расстояний между образцами • Обычно используется евклидово расстояние (по умолчанию в DESeq2). • Оно вычисляется по нормализованным данным экспрессии (rlog() или vst()). • Чем меньше расстояние — тем более похожи образцы.
sampleDists <- dist(t(assay(rlt)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(rlt$condition, rlt$type, sep="_type")
colnames(sampleDistMatrix) <- paste(rlt$condition, rlt$type, sep="_type")
colors <- colorRampPalette(rev(brewer.pal(9, "Blues")) )(255)
plt <- pheatmap(sampleDistMatrix,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
color = colors)
plt
ggsave("./pictures/Plot of the distance between samples heatmap.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
coldata_150 <- coldata[coldata$type == 150, ]
rownames(coldata_150) <- coldata_150$sample
coldata_150
common_samples_150 <- intersect(colnames(counts), coldata_150$samples)
counts_150 <- counts[, c(counts$miRNA, common_samples)]
counts_150 <- counts_150[, rownames(coldata_150)] #ранжирую по колонки в counts так же как и названия строк в coldata_150
head(counts_150)
coldata_150$condition <- relevel(factor(coldata_150$condition), ref = "before")
modelMatrix <- model.matrix(~ 0 + patient + condition , coldata)
modelMatrix
patient15 patient17 patient29 conditionafter
29.1p16_S39_R1_001 0 0 1 0
15.1p16_S33_R1_001 1 0 0 0
17.1p16_S35_R1_001 0 1 0 0
29.1p150_S40_R1_001 0 0 1 0
15.1p150_S34_R1_001 1 0 0 0
17.1p150_S36_R1_001 0 1 0 0
29.7p16_S41_R1_001 0 0 1 1
15.7p16_S31_R1_001 1 0 0 1
17.7p16_S37_R1_001 0 1 0 1
15.7p150_S32_R1_001 1 0 0 1
29.7p150_S42_R1_001 0 0 1 1
17.7p150_S38_R1_001 0 1 0 1
attr(,"assign")
[1] 1 1 1 2
attr(,"contrasts")
attr(,"contrasts")$patient
[1] "contr.treatment"
attr(,"contrasts")$condition
[1] "contr.treatment"
Создаем DESeqDataSet из матрицы каунтов
dds_150 <- DESeqDataSetFromMatrix(countData = counts_150,
colData = coldata_150,
design = ~ 0 + patient + condition)
converting counts to integer mode
dds_150$condition <- relevel(dds_150$condition, ref = "before")
dds_150
class: DESeqDataSet
dim: 913 6
metadata(1): version
assays(1): counts
rownames(913): Hsa-Let-7-P1a_3p* Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p ... Hsa-Mir-9851_3p Hsa-Mir-9851_5p*
rowData names(0):
colnames(6): 29.1p150_S40_R1_001 15.1p150_S34_R1_001 ... 29.7p150_S42_R1_001 17.7p150_S38_R1_001
colData names(4): sample condition type patient
Фильтрация
dim(dds_150)
[1] 913 6
smallestGroupSize <- 3
keep <- rowSums(counts(dds_150) >= 10) >= smallestGroupSize
dds_150 <- dds_150[keep,]
dim(dds_150)
[1] 240 6
Run Differential Expression Analysis for 150 type
dds_150 <- DESeq(dds_150, fitType = "parametric")
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
plotDispEsts(dds_150)
res_150 <- results(dds_150, contrast=c("condition", "before", "after"))
res_150
log2 fold change (MLE): condition before vs after
Wald test p-value: condition before vs after
DataFrame with 240 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue padj
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p 9637.8166 0.3135594 0.384984 0.814474 0.4153737 0.951261
Hsa-Let-7-P1b_5p 578.3774 0.7372962 0.710691 1.037435 0.2995332 0.930944
Hsa-Let-7-P1c_5p 390.5366 1.3620176 0.717510 1.898257 0.0576623 0.494248
Hsa-Let-7-P2a1_3p* 18.6002 6.0987421 2.681041 2.274766 0.0229200 0.347014
Hsa-Let-7-P2a3_5p 10822.5933 0.0444091 0.395465 0.112296 0.9105888 0.990665
... ... ... ... ... ... ...
Hsa-Mir-95-P1_3p 22.921 -3.512453 2.149234 -1.634281 0.1021999 0.655709
Hsa-Mir-95-P2_3p 288.942 0.282295 0.789321 0.357643 0.7206107 0.955427
Hsa-Mir-96-P1_5p 193.937 0.657772 0.815933 0.806159 0.4201511 0.951261
Hsa-Mir-96-P2_5p 5249.788 -0.405491 0.405309 -1.000450 0.3170930 0.930944
Hsa-Mir-96-P3_5p 205.622 2.158153 1.010105 2.136563 0.0326336 0.391603
MA plot
Фильтрация точек с низким средним экспрессированием (по baseMean). • Обычно отсекаются baseMean < 1. 2. Определение значимых генов (синие точки): • Используется критерий padj < 0.1 по умолчанию, а не < 0.05!
tiff("./pictures/PlotMA_standart_padj_0.05_type150.tiff",
width = 8, height = 6, units = "in", res = 300, bg = "white")
plotMA(res_150, alpha = 0.05, ylim = c(-8, 8))
dev.off()
null device
1
plotMA(res_150, alpha = 0.05, ylim = c(-8, 8))
Кастомный MA plot по p-value
res_df <- res_150 %>%
as.data.frame() %>%
mutate(color = case_when(
padj < 0.05 ~ "padj < 0.05",
pvalue < 0.05 ~ "pvalue < 0.05",
TRUE ~ "All micro-RNA"
))
plt <- ggplot(res_df, aes(x = baseMean, y = log2FoldChange, color = color)) +
geom_point(alpha = 0.7, size = 1) +
geom_hline(yintercept = 0, linetype = "solid", color = "gray40", size = 1.5) +
scale_color_manual(values = c("All micro-RNA" = "gray70",
"pvalue < 0.05" = "blue",
"padj < 0.05" = "red")) +
scale_x_log10(labels = scales::scientific) +
theme_minimal() +
labs(x = "mean of normalized counts",
y = "log fold change",
color = NULL) # Название легенды
plt
ggsave("./pictures/Сustom MAplot_type150.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Значимые результаты
signres_150 <- results(dds_150, contrast=c("condition", "before", "after"), alpha=0.05)
summary(signres_150)
out of 240 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up) : 1, 0.42%
LFC < 0 (down) : 3, 1.2%
outliers [1] : 0, 0%
low counts [2] : 0, 0%
(mean count < 6)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
Let’s arranged it by log2FoldChange:
order_indices <- order(-res_150$log2FoldChange)
res_150[order_indices, ]
log2 fold change (MLE): condition before vs after
Wald test p-value: condition before vs after
DataFrame with 240 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue padj
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
Hsa-Mir-150_3p* 41.0827 6.48415 2.05853 3.14990 0.001633280 0.07839744
Hsa-Mir-328_3p 92.7095 6.41844 1.64997 3.89004 0.000100228 0.00801828
Hsa-Mir-145_3p* 44.0673 6.31701 2.05972 3.06693 0.002162671 0.08086155
Hsa-Let-7-P2a1_3p* 18.6002 6.09874 2.68104 2.27477 0.022919964 0.34701443
Hsa-Mir-33-P2_5p* 17.7651 6.08611 2.76046 2.20475 0.027471976 0.34701443
... ... ... ... ... ... ...
Hsa-Mir-133-P1_3p/P2_3p/P3_3p 778.6034 -4.27149 0.634382 -6.73331 1.65852e-11 1.99022e-09
Hsa-Mir-431_5p 18.5437 -4.69940 2.595322 -1.81072 7.01841e-02 5.80834e-01
Hsa-Mir-219-P1_5p* 11.4359 -4.76811 3.574814 -1.33381 1.82267e-01 8.27845e-01
Hsa-Mir-499_5p 68.1969 -5.58010 1.488728 -3.74824 1.78082e-04 1.06849e-02
Hsa-Mir-208-P2_3p 544.5306 -7.31750 1.032542 -7.08688 1.37167e-12 3.29202e-10
Visualisation for the first gene
#plotCounts(dds_150, gene=which.max(res_150$log2FoldChange), intgroup="condition")
plotCounts(dds_150, gene=which.min(res_150$padj), intgroup="condition")
#plotCounts(dds, gene=rownames(res)[which.min(res$padj[which.max(res$log2FoldChange)])], intgroup="condition")
Volcano plot
plt <- EnhancedVolcano(res_150,
lab = rownames(res_150),
x = "log2FoldChange",
y = "padj",
pCutoff = 0.05,
FCcutoff = 1,
labSize = 3.0,
boxedLabels = FALSE,
col = c('black', '#CBD5E8', '#B3E2CD', '#FDCDAC'),
colAlpha = 1,
title = NULL,
subtitle = NULL)
plt
ggsave("./pictures/Volcano plot_150type.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
rlog трансформация • Черные точки – стандартное отклонение отдельных генов. • Красная линия – сглаженный тренд зависимости SD от среднего значения экспрессии. • Если красная линия наклонена вверх → стандартное отклонение растёт с увеличением среднего (плохая нормализация). • Если красная линия примерно горизонтальна → нормализация сработала хорошо.
rlt_150 <- rlog(dds_150)
meanSdPlot(assay(rlt_150)) #показывает, как изменяется стандартное отклонение в зависимости от среднего значения экспрессии.
PCA plot PCA – это метод снижения размерности, который показывает различия между образцами. Строится на основе различий в экспрессии по всем генам, даже если они не являются значимо дифференциально экспрессированными. • Оно не накладывает статистических порогов вроде padj < 0.05 или |log2FoldChange| > 1, а просто ищет наибольшие источники вариабельности среди всех измерений. • Поэтому даже небольшие изменения экспрессии могут формировать кластеры и различать группы в PCA, если эти изменения систематичны между образцами.
pcaData <- plotPCA(rlt_150, intgroup=c("condition", "patient"), returnData = TRUE)
using ntop=500 top features by variance
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, shape = patient, color = condition)) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar[1], "%")) +
ylab(paste0("PC2: ", percentVar[2], "%")) +
coord_fixed() +
theme_bw() +
scale_color_brewer(palette = "Set2")
assay(rlt_150) <- limma::removeBatchEffect(assay(rlt_150),
batch = colData(dds_150)[,'patient'])
pcaData <- plotPCA(rlt_150, intgroup=c("condition", "patient"), returnData = TRUE)
using ntop=500 top features by variance
percentVar <- round(100 * attr(pcaData, "percentVar"))
plt <- ggplot(pcaData, aes(PC1, PC2, shape = patient, color = condition)) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar[1], "%")) +
ylab(paste0("PC2: ", percentVar[2], "%")) +
coord_fixed() +
theme_bw() +
scale_color_brewer(palette = "Set2")
plt
ggsave("./pictures/PCA plot for type 150 after removing donor effect.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Plot a heatmap of diff expressed genes
res_sign_150 <- subset(res_150, padj < 0.05 & !is.na(padj) & abs(log2FoldChange) > 1.0)
res_sign_150 <- res_sign_150[order(res_sign_150$log2FoldChange, decreasing = TRUE), ]
sig_genes <- rownames(res_sign_150) # Получаем имена генов, которые прошли фильтрацию
de_mat <- assay(rlt_150)[sig_genes, ]
datamatrix <- de_mat
annotation_col <- data.frame(condition = coldata_150$condition)
rownames(annotation_col) <- colnames(datamatrix)
annotation_colors <- list(
condition = c("before" = "#FFCC00", "after" = "#3399FF")
)
plt <- pheatmap(datamatrix,
cluster_rows = TRUE,
show_rownames = TRUE,
cluster_cols = TRUE,
annotation_col = annotation_col,
annotation_colors = annotation_colors,
display_numbers = TRUE,
legend = TRUE,
fontsize = 15)
plt
ggsave("./pictures/Heatmap of diff expressed genes_type150.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Plot of the distance between samples heatmap Расчет расстояний между образцами • Обычно используется евклидово расстояние (по умолчанию в DESeq2). • Оно вычисляется по нормализованным данным экспрессии (rlog() или vst()). • Чем меньше расстояние — тем более похожи образцы.
sampleDists_150 <- dist(t(assay(rlt_150)))
sampleDistMatrix_150 <- as.matrix(sampleDists_150)
rownames(sampleDistMatrix_150) <- paste(rlt_150$condition, rlt_150$patient, sep="_patient")
colnames(sampleDistMatrix_150) <- paste(rlt_150$condition, rlt_150$patient, sep="_patient")
colors <- colorRampPalette(rev(brewer.pal(9, "Blues")) )(255)
plt <- pheatmap(sampleDistMatrix_150,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
fontsize = 12,
legend = FALSE,
display_numbers = TRUE,
color = colors)
plt
ggsave("./pictures/Plot of the distance between samples_type150.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
up_150 <- res_sign_150 %>%
as.data.frame() %>%
filter(log2FoldChange > 0)
down_150 <- res_sign_150 %>%
as.data.frame() %>%
filter(log2FoldChange < 0)
rownames(up_150)
[1] "Hsa-Mir-328_3p"
rownames(down_150)
[1] "Hsa-Mir-133-P1_3p/P2_3p/P3_3p" "Hsa-Mir-499_5p" "Hsa-Mir-208-P2_3p"
перевела вручную: https://mirgenedb.org/browse/hsa Hsa-Mir-328 = hsa-mir-328 Eutheria — клада зверей, включающая плацентарных и различные вымершие инфраклассы. Hsa-Mir-133-P1 = hsa-mir-133a-1 Gnathostomata Hsa-Mir-133-P2 = hsa-mir-133b Gnathostomata Hsa-Mir-133-P3 = hsa-mir-133a-2 Gnathostomata есть только hsa-miR-133a-3p Hsa-Mir-499 = hsa-mir-499a Vertebrata Hsa-Mir-208-P2 hsa-mir-208a Vertebrata • miRBase: https://www.mirbase.org/ • MirGeneDB: https://mirgenedb.org/
mirna_names_up <- c("hsa-miR-328-3p")
mirna_names_down <- c("hsa-miR-133a-3p", "hsa-miR-133a-3p", "hsa-miR-208a-3p", "hsa-miR-499a-5p")
Конвертация в MIMATID
converted_mirna_up <- miRNAVersionConvert(mirna_names_up)
converted_mirna_down <- miRNAVersionConvert(mirna_names_down)
converted_mirna_up
converted_mirna_down
Запрос таргетов из базы multiMiR
targets150_down <- unique(get_multimir(org = "hsa", mirna = converted_mirna_up$Accession, table = "validated")@data$target_symbol)
Searching mirecords ...
Searching mirtarbase ...
Searching tarbase ...
targets150_up <- unique(get_multimir(org = "hsa", mirna = converted_mirna_down$Accession, table = "validated")@data$target_symbol)
Searching mirecords ...
Searching mirtarbase ...
Searching tarbase ...
#writeLines(targets_down, "targets_down150_list.txt")
#writeLines(targets_up, "targets_up150_list.txt")
Анализ обогащения, связанный со Biological Processes
# msig_go_bp <- msigdbr(species = "Homo sapiens", category = "C7", subcategory = "IMMUNESIGDB")
#
# GO_enrich_up150_immum <- enricher(gene = targets150_up, TERM2GENE = msig_go_bp[, c("gs_name", "gene_symbol")])
# GO_enrich_down150_immun <- enricher(gene = targets150_down, TERM2GENE = msig_go_bp[, c("gs_name", "gene_symbol")])
GO_enrich_down150_bp <- enrichGO(
gene = targets150_down,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
GO_enrich_up150_bp <- enrichGO(
gene = targets150_up,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
Визуализация Biological Processes
p1 <- dotplot(GO_enrich_up150_bp, showCategory = 20) +
ggtitle("BP for Upregulated Targets in Vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 8)
)
p2 <- dotplot(GO_enrich_down150_bp, showCategory = 20) +
ggtitle("BP for Downregulated Targets in Vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 8)
)
p1 + p2
combined_plot <- p1 + p2
ggsave("./pictures/GO_enrichment_dotplot_BP_type150.tiff", plot = combined_plot, width = 16, height = 10, dpi = 300)
GO_enrich_UP150_BP <- enrichplot::pairwise_termsim(GO_enrich_up150_bp, method = "JC")
plt <- emapplot(GO_enrich_UP150_BP,
repel = TRUE,
showCategory = 20) +
ggtitle("All processes for UPregulated targets for vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_BP_up_type150.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO_enrich_DOWN150_BP <- enrichplot::pairwise_termsim(GO_enrich_down150_bp, method = "JC")
plt <- emapplot(GO_enrich_DOWN150_BP,
repel = TRUE,
showCategory = 20) +
ggtitle("All processes for DOWNregulated targets for vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_BP_down_type150.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO Enrichment of Cellular Component
# msig_go_bp <- msigdbr(species = "Homo sapiens", category = "C7", subcategory = "IMMUNESIGDB")
#
# GO_enrich_up150_immum <- enricher(gene = targets150_up, TERM2GENE = msig_go_bp[, c("gs_name", "gene_symbol")])
# GO_enrich_down150_immun <- enricher(gene = targets150_down, TERM2GENE = msig_go_bp[, c("gs_name", "gene_symbol")])
GO_enrich_down150_CC <- enrichGO(
gene = targets150_down,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "CC",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
GO_enrich_up150_CC <- enrichGO(
gene = targets150_up,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "CC",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
p1 <- dotplot(GO_enrich_up150_CC, showCategory = 20) +
ggtitle("GO Enrichment of Cellular Component for Upregulated Targets in Vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
p2 <- dotplot(GO_enrich_down150_CC, showCategory = 20) +
ggtitle("GO Enrichment of Cellular Component for Downregulated Targets in Vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
p1 + p2
combined_plot <- p1 + p2
ggsave("./pictures/GO_enrichment_dotplot_CC_type150.tiff", plot = combined_plot, width = 16, height = 10, dpi = 300)
GO_enrich_UP150_CC <- enrichplot::pairwise_termsim(GO_enrich_up150_CC, method = "JC")
plt <- emapplot(GO_enrich_UP150_CC,
repel = TRUE,
showCategory = 20) +
ggtitle("All processes for UPregulated targets for vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_CC_up_type150.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO_enrich_DOWN150_CC <- enrichplot::pairwise_termsim(GO_enrich_down150_CC, method = "JC")
plt <- emapplot(GO_enrich_DOWN150_CC,
repel = TRUE,
showCategory = 25) +
ggtitle("All processes for DOWNregulated targets for vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_CC_down_type150.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO Enrichment of Molecular Function
GO_enrich_down150_MF <- enrichGO(
gene = targets150_down,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "MF",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
GO_enrich_up150_MF <- enrichGO(
gene = targets150_up,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "MF",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
p1 <- dotplot(GO_enrich_up150_MF, showCategory = 20) +
ggtitle("GO Enrichment of Molecular Function for Upregulated Targets in Vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
p2 <- dotplot(GO_enrich_down150_MF, showCategory = 20) +
ggtitle("GO Enrichment of Molecular Functiont for Downregulated Targets in Vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
p1 + p2
combined_plot <- p1 + p2
ggsave("./pictures/GO_enrichment_dotplot_MF_type150.tiff", plot = combined_plot, width = 16, height = 10, dpi = 300)
GO_enrich_UP150_MF <- enrichplot::pairwise_termsim(GO_enrich_up150_MF, method = "JC")
plt <- emapplot(GO_enrich_UP150_MF,
repel = TRUE,
showCategory = 25) +
ggtitle("Molecular Function processes for UPregulated targets for vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_MF_up_type150.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO_enrich_DOWN150_MF <- enrichplot::pairwise_termsim(GO_enrich_down150_MF, method = "JC")
plt <- emapplot(GO_enrich_DOWN150_MF,
repel = TRUE,
showCategory = 25) +
ggtitle("Molecular Function processes for DOWNregulated targets for vesicles 150") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_MF_down_type150.tiff", plot = plt, width = 16, height = 10, dpi = 300)
coldata_16 <- coldata[coldata$type == 16, ]
rownames(coldata_16) <- coldata_16$sample
coldata_16
common_samples_16 <- intersect(colnames(counts), coldata_16$samples)
counts_16 <- counts[, c(counts$miRNA, common_samples)]
counts_16 <- counts_16[, rownames(coldata_16)] #ранжирую по колонки в counts так же как и названия строк в coldata_150
head(counts_16)
Создаем DESeqDataSet из матрицы каунтов
dds_16 <- DESeqDataSetFromMatrix(countData = counts_16,
colData = coldata_16,
design = ~ 0 + patient + condition)
converting counts to integer mode
dds_16$condition <- relevel(dds_16$condition, ref = "before")
dds_16
class: DESeqDataSet
dim: 913 6
metadata(1): version
assays(1): counts
rownames(913): Hsa-Let-7-P1a_3p* Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p ... Hsa-Mir-9851_3p Hsa-Mir-9851_5p*
rowData names(0):
colnames(6): 29.1p16_S39_R1_001 15.1p16_S33_R1_001 ... 15.7p16_S31_R1_001 17.7p16_S37_R1_001
colData names(4): sample condition type patient
Фильтрация
dim(dds_16)
[1] 913 6
smallestGroupSize <- 3
keep <- rowSums(counts(dds_16) >= 10) >= smallestGroupSize
dds_16 <- dds_16[keep,]
dim(dds_16)
[1] 219 6
Run Differential Expression Analysis for 150 type
dds_16 <- DESeq(dds_16, fitType = "parametric")
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
plotDispEsts(dds_16)
res_16 <- results(dds_16, contrast=c("condition", "before", "after"))
res_16
log2 fold change (MLE): condition before vs after
Wald test p-value: condition before vs after
DataFrame with 219 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue padj
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
Hsa-Let-7-P1a_5p/P2a1_5p/P2a2_5p 22993.7673 0.1901164 0.224536 0.846707 0.3971586 0.926559
Hsa-Let-7-P1b_5p 2527.3696 0.4961225 0.280481 1.768830 0.0769222 0.575168
Hsa-Let-7-P1c_5p 481.2148 0.5330152 0.461003 1.156209 0.2475958 0.860794
Hsa-Let-7-P2a1_3p* 23.7282 0.4135740 1.966411 0.210319 0.8334185 0.954993
Hsa-Let-7-P2a3_5p 45006.2426 -0.0642043 0.238029 -0.269733 0.7873659 0.940503
... ... ... ... ... ... ...
Hsa-Mir-92-P2c_5p* 25.3899 -0.754468 2.275831 -0.331513 0.7402570 0.931703
Hsa-Mir-95-P2_3p 193.6126 -0.340069 0.662909 -0.512995 0.6079550 0.926559
Hsa-Mir-96-P1_5p 14.4204 -4.216533 2.813830 -1.498503 0.1340025 0.764653
Hsa-Mir-96-P2_5p 4249.9579 -0.537344 0.268063 -2.004539 0.0450123 0.487244
Hsa-Mir-96-P3_5p 121.7907 0.504262 0.806362 0.625355 0.5317382 0.926559
MA plot
Фильтрация точек с низким средним экспрессированием (по baseMean). • Обычно отсекаются baseMean < 1. 2. Определение значимых генов (синие точки): • Используется критерий padj < 0.1 по умолчанию, а не < 0.05!
tiff("./pictures/PlotMA_standart_padj_0.05_type16.tiff",
width = 8, height = 6, units = "in", res = 300, bg = "white")
plotMA(res_16, alpha = 0.05, ylim = c(-8, 8))
dev.off()
null device
1
plotMA(res_16, alpha = 0.05, ylim = c(-8, 8))
Кастомный MA plot по p-value
res_df <- res_16 %>%
as.data.frame %>%
mutate(color = case_when(
pvalue < 0.05 & !is.na(pvalue) & abs(log2FoldChange) > 1 ~ "blue", # Значимые по p-value и диф экспрессированные
TRUE ~ "gray70"
))
plt <- ggplot(res_df, aes(x = baseMean, y = log2FoldChange, color = color)) +
geom_point(alpha = 0.7, size = 1) +
geom_hline(yintercept = 0, linetype = "solid", color = "gray40", size = 1.5) + # Добавляем линию
scale_color_manual(values = c("gray70" = "gray70", "blue" = "blue")) +
scale_x_log10(labels = scales::scientific) +
theme_minimal() +
labs(x = "mean of normalized counts", y = "log fold change") +
theme(legend.position = "none")
plt
ggsave("./pictures/PlotMA_castom_pvalue0.05_type16.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Значимые результаты
summary(results(dds_16, contrast=c("condition", "before", "after"), alpha=0.05))
out of 219 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up) : 0, 0%
LFC < 0 (down) : 0, 0%
outliers [1] : 0, 0%
low counts [2] : 0, 0%
(mean count < 8)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
Let’s arranged it by log2FoldChange:
order_indices <- order(-res_16$log2FoldChange)
res_16[order_indices, ]
log2 fold change (MLE): condition before vs after
Wald test p-value: condition before vs after
DataFrame with 219 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue padj
<numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
Hsa-Mir-185_5p 14.8310 5.85950 2.69426 2.17481 0.02964445 0.487244
Hsa-Mir-136_5p* 13.3826 5.83711 3.16104 1.84658 0.06480773 0.575168
Hsa-Mir-197_3p 45.5597 4.16951 1.59282 2.61769 0.00885267 0.357073
Hsa-Mir-101-P1_5p* 16.5354 4.08972 2.30004 1.77811 0.07538608 0.575168
Hsa-Mir-10-P3a_5p 37.9479 3.82488 1.69051 2.26256 0.02366292 0.487244
... ... ... ... ... ... ...
Hsa-Mir-154-P17_3p 15.3729 -4.07531 2.60884 -1.56211 0.1182617 0.764653
Hsa-Mir-190-P1_5p 10.5910 -4.10593 3.44201 -1.19289 0.2329137 0.850135
Hsa-Mir-96-P1_5p 14.4204 -4.21653 2.81383 -1.49850 0.1340025 0.764653
Hsa-Mir-128-P1_5p* 15.3208 -4.49032 3.05489 -1.46988 0.1415942 0.764653
Hsa-Mir-197_5p* 17.0795 -5.15605 2.93332 -1.75775 0.0787901 0.575168
Visualisation for the first gene
plotCounts(dds_16, gene=which.max(res_16$log2FoldChange), intgroup="condition")
plotCounts(dds_16, gene=which.min(res_16$pvalue), intgroup="condition")
#plotCounts(dds, gene=rownames(res)[which.min(res$padj[which.max(res$log2FoldChange)])], intgroup="condition")
Volcano plot
plt <- EnhancedVolcano(res_16,
lab = rownames(res_16),
x = "log2FoldChange",
y = "pvalue",
pCutoff = 0.05,
FCcutoff = 1,
labSize = 3.0,
boxedLabels = FALSE,
col = c('black', '#CBD5E8', '#B3E2CD', '#FDCDAC'),
colAlpha = 1,
title = NULL,
subtitle = NULL)
plt
ggsave("./pictures/Volcano plot_based_on_Pvalue_16type.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
rlog трансформация • Черные точки – стандартное отклонение отдельных генов. • Красная линия – сглаженный тренд зависимости SD от среднего значения экспрессии. • Если красная линия наклонена вверх → стандартное отклонение растёт с увеличением среднего (плохая нормализация). • Если красная линия примерно горизонтальна → нормализация сработала хорошо.
rlt_16 <- rlog(dds_16)
meanSdPlot(assay(rlt_16)) #показывает, как изменяется стандартное отклонение в зависимости от среднего значения экспрессии.
pcaData <- plotPCA(rlt_16, intgroup=c("condition", "patient"), returnData = TRUE)
using ntop=500 top features by variance
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, shape = patient, color = condition)) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar[1], "%")) +
ylab(paste0("PC2: ", percentVar[2], "%")) +
coord_fixed() +
theme_bw() +
scale_color_brewer(palette = "Set2")
assay(rlt_16) <- limma::removeBatchEffect(assay(rlt_16),
batch = colData(dds_16)[,'patient'])
pcaData <- plotPCA(rlt_16, intgroup=c("condition", "patient"), returnData = TRUE)
using ntop=500 top features by variance
percentVar <- round(100 * attr(pcaData, "percentVar"))
plt <- ggplot(pcaData, aes(PC1, PC2, shape = patient, color = condition)) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar[1], "%")) +
ylab(paste0("PC2: ", percentVar[2], "%")) +
coord_fixed() +
theme_bw() +
scale_color_brewer(palette = "Set2")
plt
ggsave("./pictures/PCA plot for type 16 after removing donor effect.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Plot a heatmap of the most expressed genes
res_sign_16 <- subset(res_16, pvalue < 0.05 & !is.na(pvalue) & abs(log2FoldChange) > 1.0)
res_sign_16 <- res_sign_16[order(res_sign_16$log2FoldChange, decreasing = TRUE), ]
sig_genes <- rownames(res_sign_16) # Получаем имена генов, которые прошли фильтрацию
de_mat <- assay(rlt_16)[sig_genes, ]
datamatrix <- de_mat
#datamatrix <- t(scale(t(de_mat)))
annotation_col <- data.frame(condition = coldata_16$condition)
rownames(annotation_col) <- colnames(datamatrix)
annotation_colors <- list(
condition = c("before" = "#FFCC00", "after" = "#3399FF")
)
plt <- pheatmap(datamatrix,
cluster_rows = TRUE,
show_rownames = TRUE,
cluster_cols = TRUE,
annotation_col = annotation_col,
annotation_colors = annotation_colors,
display_numbers = TRUE,
legend = FALSE,
fontsize = 15)
plt
ggsave("./pictures/Heatmap of diff expressed genes_type16.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
Plot of the distance between samples heatmap Расчет расстояний между образцами • Обычно используется евклидово расстояние (по умолчанию в DESeq2). • Оно вычисляется по нормализованным данным экспрессии (rlog() или vst()). • Чем меньше расстояние — тем более похожи образцы.
sampleDists_16 <- dist(t(assay(rlt_16)))
sampleDistMatrix_16 <- as.matrix(sampleDists_16)
rownames(sampleDistMatrix_16) <- paste(rlt_16$condition, rlt_16$patient, sep="_patient")
colnames(sampleDistMatrix_16) <- paste(rlt_16$condition, rlt_16$patient, sep="_patient")
plt <- pheatmap(sampleDistMatrix_16,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
fontsize = 12,
legend = FALSE,
display_numbers = TRUE,
color = colors)
plt
ggsave("./pictures/Plot of the distance between samples_type16.tiff", plot = plt, width = 8, height = 6, dpi = 300, bg = "white")
up_16 <- res_sign_16 %>%
as.data.frame() %>%
filter(log2FoldChange > 0)
down_16 <- res_sign_16 %>%
as.data.frame() %>%
filter(log2FoldChange < 0)
rownames(up_16)
[1] "Hsa-Mir-185_5p" "Hsa-Mir-197_3p" "Hsa-Mir-10-P3a_5p" "Hsa-Mir-148-P3_5p*" "Hsa-Mir-30-P1a_3p*" "Hsa-Mir-150_3p*" "Hsa-Mir-361_3p*"
[8] "Hsa-Mir-10-P2b_5p" "Hsa-Mir-126_3p*"
rownames(down_16)
[1] "Hsa-Mir-340_5p" "Hsa-Mir-181-P2c_5p" "Hsa-Mir-223_5p*" "Hsa-Mir-30-P1c_3p*"
Переводим в miRBase • miRBase: https://www.mirbase.org/ • MirGeneDB: https://mirgenedb.org/
url <- "https://mirgenedb.org/browse/hsa"
page <- read_html(url)
Парсим таблицу
mir_table <- page %>%
html_element("table") %>%
html_table(fill = TRUE)
mir_table <- mir_table[-c(1:3), c(1,2) ]
colnames(mir_table) <- c("MirGeneDB_ID", "MiRBase_ID")
mir_table$MirGeneDB_ID <- sub(" V", "", mir_table$MirGeneDB_ID)
head(mir_table)
up_16_clean <- sub("_.*", "", row.names(up_16))
up_16_converted <- mir_table$MiRBase_ID[match(up_16_clean, mir_table$MirGeneDB_ID)]
down_16_clean <- sub("_.*", "", row.names(down_16))
down_16_converted <- mir_table$MiRBase_ID[match(down_16_clean, mir_table$MirGeneDB_ID)]
up_16_converted
[1] "hsa-mir-185" "hsa-mir-197" NA "hsa-mir-152" "hsa-mir-30d" "hsa-mir-150" "hsa-mir-361" "hsa-mir-99b" NA
down_16_converted
[1] "hsa-mir-340" "hsa-mir-181d" "hsa-mir-223" NA
Конвертация в MIMATID в итоге заменила NA вручную на самые близкие, но это такая себе практика
NA Hsa-Mir-10-P3a_5p есть два соответствия: Hsa-Mir-10-P1c = hsa-mir-10a Hsa-Mir-10-P3b = hsa-mir-125a
NA Hsa-Mir-126_3p* есть одно соответствие: Hsa-Mir-126-P2 = hsa-mir-126
NA Hsa-Mir-30-P1c_3p* есть три соответствия: Hsa-Mir-30-P1a = hsa-mir-30d Hsa-Mir-30-P1b = hsa-mir-30a Hsa-Mir-30-P1d = hsa-mir-30e
[1] “Hsa-Mir-185_5p” “Hsa-Mir-197_3p” “Hsa-Mir-10-P3a_5p”
“Hsa-Mir-148-P3_5p” ”Hsa-Mir-30-P1a_3p”
“Hsa-Mir-150_3p”
[7] ”Hsa-Mir-361_3p” “Hsa-Mir-10-P2b_5p” “Hsa-Mir-126_3p”
[1] ”Hsa-Mir-340_5p” ”Hsa-Mir-181-P2c_5p” ”Hsa-Mir-223_5p”
“Hsa-Mir-30-P1c_3p*”
MI (MicroRNA Gene ID) — это идентификатор предшественника (precursor) miRNA MIMAT (Mature miRNA ID) — это идентификатор зрелой (mature) miRNA, которая функционирует в клетке
up_16_converted <- c("hsa-miR-185-5p", "hsa-miR-197-3p", "hsa-miR-152-5p", "hsa-miR-30d-3p", "hsa-miR-150-3p", "hsa-miR-361-3p", "hsa-miR-99b-5p", "hsa-miR-126-3p")
down_16_converted <- c("hsa-miR-340-5p", "hsa-miR-181d-5p", "hsa-miR-223-5p")
converted_mirna_up16 <- miRNAVersionConvert(up_16_converted)
converted_mirna_down16 <- miRNAVersionConvert(down_16_converted)
converted_mirna_up16
converted_mirna_down16
Запрос таргетов из базы multiMiR
targets16_up <- unique(get_multimir(org = "hsa", mirna = converted_mirna_up16$Accession, table = "validated")@data$target_symbol)
Searching mirecords ...
Searching mirtarbase ...
Searching tarbase ...
targets16_down <- unique(get_multimir(org = "hsa", mirna = converted_mirna_down16$Accession, table = "validated")@data$target_symbol)
Searching mirecords ...
Searching mirtarbase ...
Searching tarbase ...
Анализ обогащения, связанный со Biological Process
GO_enrich_up16_bp <- enrichGO(
gene = targets16_up,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
GO_enrich_down16_bp <- enrichGO(
gene = targets16_down,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
p1 <- dotplot(GO_enrich_up16_bp, showCategory = 20) +
ggtitle("BP for Upregulated Targets in Vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
p2 <- dotplot(GO_enrich_down16_bp, showCategory = 20) +
ggtitle("BP for Downregulated Targets in Vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
combined_plot <- p1 + p2
combined_plot
ggsave("./pictures/GO_enrichment_dotplot_BP_type16.tiff", plot = combined_plot, width = 16, height = 10, dpi = 300)
GO_enrich_UP16_BP <- enrichplot::pairwise_termsim(GO_enrich_up16_bp, method = "JC")
plt <- emapplot(GO_enrich_UP16_BP,
repel = TRUE,
showCategory = 20) +
ggtitle("BP for UPregulated targets for vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_BP_up_type16.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO_enrich_DOWN16_all <- enrichplot::pairwise_termsim(GO_enrich_down16_all, method = "JC")
plt <- emapplot(GO_enrich_DOWN16_all,
repel = TRUE,
showCategory = 20) +
ggtitle("BP for DOWNregulated targets for vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_BP_down_type16.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO Enrichment of Cellular Component
GO_enrich_down16_CC <- enrichGO(
gene = targets16_down,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "CC",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
GO_enrich_up16_CC <- enrichGO(
gene = targets16_up,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "CC",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
p1 <- dotplot(GO_enrich_up16_CC, showCategory = 20) +
ggtitle("6Cellular Component for Upregulated Targets in Vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
p2 <- dotplot(GO_enrich_down16_CC, showCategory = 20) +
ggtitle("Cellular Component for Downregulated Targets in Vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
p1 + p2
combined_plot <- p1 + p2
ggsave("./pictures/GO_enrichment_dotplot_CC_type16.tiff", plot = combined_plot, width = 16, height = 10, dpi = 300)
GO_enrich_UP16_CC <- enrichplot::pairwise_termsim(GO_enrich_up16_CC, method = "JC")
plt <- emapplot(GO_enrich_UP16_CC,
repel = TRUE,
showCategory = 20) +
ggtitle("Cellular processes for UPregulated targets for vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_CC_up_type16.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO_enrich_DOWN16_CC <- enrichplot::pairwise_termsim(GO_enrich_down16_CC, method = "JC")
plt <- emapplot(GO_enrich_DOWN16_CC,
repel = TRUE,
showCategory = 25) +
ggtitle("Cellular processes for DOWNregulated targets for vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_CC_down_type16.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO Enrichment of Molecular Function
GO_enrich_down16_MF <- enrichGO(
gene = targets16_down,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "MF",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
GO_enrich_up16_MF <- enrichGO(
gene = targets16_up,
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
ont = "MF",
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
p1 <- dotplot(GO_enrich_up16_MF, showCategory = 20) +
ggtitle("Molecular Function for Upregulated Targets in Vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
p2 <- dotplot(GO_enrich_down16_MF, showCategory = 20) +
ggtitle("Molecular Functiont for Downregulated Targets in Vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 12)
)
combined_plot <- p1 + p2
combined_plot
ggsave("./pictures/GO_enrichment_dotplot_MF_type16.tiff", plot = combined_plot, width = 16, height = 10, dpi = 300)
GO_enrich_UP16_MF <- enrichplot::pairwise_termsim(GO_enrich_up16_MF, method = "JC")
plt <- emapplot(GO_enrich_UP16_MF,
repel = TRUE,
showCategory = 25) +
ggtitle("Molecular Function processes for UPregulated targets for vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_MF_up_type16.tiff", plot = plt, width = 16, height = 10, dpi = 300)
GO_enrich_DOWN16_MF <- enrichplot::pairwise_termsim(GO_enrich_down16_MF, method = "JC")
plt <- emapplot(GO_enrich_DOWN16_MF,
repel = TRUE,
showCategory = 25) +
ggtitle("Molecular Function processes for DOWNregulated targets for vesicles 16") +
theme(
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 3)
)
plt
ggsave("./pictures/GO_enrichment_emapplot_MF_down_type16.tiff", plot = plt, width = 16, height = 10, dpi = 300)